This report examines the impact of major disruptions—including the COVID-19 pandemic, significant public events, and extreme weather—on train usage patterns in Sydney between 2020 and 2025. Using Opal card data and Bureau of Meteorology (BOM) records, we analyze changes in passenger volumes, identify key trends, and compare the magnitude and duration of different types of disruptions.
Below, we analyze the impact of COVID-19 on Sydney train station usage using Opal data. The code and analysis are directly adapted from T1.qmd, with each code block preceded by a description of its purpose.
# DATA PREP
# ignore closed stations
stations_to_ignore <- c("Rosehill", "Camellia", "Rydalmere", "Dundas", "Telopea", "Carlingford")
ee <- ee %>%
filter(Station_Type %in% c("Train", "Metro Shared")) %>%
mutate(
Train_Station = gsub(" Station", "", Station),
Train_Station = gsub(" $", "", Train_Station),
TripNumber = as.numeric(ifelse(Trip == "Less than 50", 50, Trip)),
MonthYear = as.Date(paste0(MonthYear, "-01"))
) %>%
filter(!Train_Station %in% stations_to_ignore) %>%
mutate(
Phase = case_when(
MonthYear >= as.Date("2020-01-01") & MonthYear <= as.Date("2021-06-30") ~ "Early COVID",
MonthYear >= as.Date("2021-07-01") & MonthYear <= as.Date("2021-12-31") ~ "Delta COVID",
MonthYear >= as.Date("2022-01-01") ~ "Post COVID",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(Phase))
# load and merge station coordinates
stations <- stations %>%
filter(!duplicated(Train_Station)) %>%
filter(Train_Station %in% ee$Train_Station)
ee <- ee %>%
left_join(stations %>%
select(Train_Station, LAT, LONG), by = "Train_Station") %>%
filter(!is.na(LAT) & !is.na(LONG))
ee_sf <- st_as_sf(ee, coords = c("LONG", "LAT"), crs = 4326)
# load route shapefile
trains <- st_read(shp_path, quiet = TRUE) %>% st_transform(crs = 4326)
# summarize
station_phase_totals <- ee_sf %>%
st_drop_geometry() %>%
group_by(Train_Station, Phase) %>%
summarise(
Total_Entries = sum(TripNumber[Entry_Exit == "Entry"], na.rm = TRUE),
Total_Exits = sum(TripNumber[Entry_Exit == "Exit"], na.rm = TRUE),
.groups = "drop"
)
# % drop and recovery + AVG
station_trends <- station_phase_totals %>%
pivot_wider(
names_from = Phase,
values_from = c(Total_Entries, Total_Exits),
names_glue = "{.value}_{gsub(' ', '_', Phase)}"
) %>%
mutate(
Entry_Drop_Pct = round((Total_Entries_Early_COVID - Total_Entries_Delta_COVID) / Total_Entries_Early_COVID * 100, 1),
Entry_Recovery_Pct = round((Total_Entries_Post_COVID - Total_Entries_Delta_COVID) / Total_Entries_Delta_COVID * 100, 1),
Exit_Drop_Pct = round((Total_Exits_Early_COVID - Total_Exits_Delta_COVID) / Total_Exits_Early_COVID * 100, 1),
Exit_Recovery_Pct = round((Total_Exits_Post_COVID - Total_Exits_Delta_COVID) / Total_Exits_Delta_COVID * 100, 1),
Avg_Drop_Pct = round((Entry_Drop_Pct + Exit_Drop_Pct) / 2, 1),
Avg_Recovery_Pct = round((Entry_Recovery_Pct + Exit_Recovery_Pct) / 2, 1)
)
This code generates a small bar chart for each station (as a tooltip), showing entries and exits by phase, and combines this with drop/recovery statistics for use in an interactive map.
# embedded bar chart tooltip
create_station_plot_base64 <- function(station_name) {
station_data <- station_phase_totals %>% filter(Train_Station == station_name)
station_data$Phase <- factor(station_data$Phase, levels = c("Early COVID", "Delta COVID", "Post COVID"))
long_data <- station_data %>%
pivot_longer(cols = c(Total_Entries, Total_Exits),
names_to = "Type", values_to = "Count") %>%
mutate(Type = ifelse(Type == "Total_Entries", "Entries", "Exits"))
p <- ggplot(long_data, aes(x = Phase, y = Count, fill = Type)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = Count), position = position_dodge(0.9), vjust = -0.7, size = 3.2) +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
labs(title = station_name) +
theme_minimal() +
theme(
legend.position = "top",
axis.text.x = element_text(angle = 45, hjust = 1),
plot.margin = margin(10, 10, 10, 10)
)
file_path <- tempfile(fileext = ".png")
ggsave(file_path, plot = p, width = 4, height = 3.5, dpi = 150)
encoded <- base64enc::dataURI(file = file_path, mime = "image/png")
return(paste0("<img src='", encoded, "' width='260'>"))
}
# tooltip: embed chart + averaged % drop/recovery info
tooltip_df <- station_trends %>%
mutate(
chart_img = sapply(Train_Station, create_station_plot_base64),
tooltip = paste0(
chart_img, "<br>",
"<b>Drop & Recovery:</b><br>",
"Average Drop (Early → Delta): ", Avg_Drop_Pct, "%<br>",
"Average Recovery (Delta → Post): ", Avg_Recovery_Pct, "%"
)
)
# df with spatial + tooltips
df <- ee_sf %>%
filter(Entry_Exit == "Entry") %>%
distinct(Train_Station, .keep_all = TRUE) %>%
left_join(tooltip_df, by = "Train_Station")
This code defines a function to plot all stations on a map, color-coded by route, with interactive tooltips showing drop and recovery statistics and a bar chart for each station. The map supports zooming.
# plot func with zoom
plot_all_phases <- function() {
trains_proj <- st_transform(trains, crs = 3857)
sydney_region <- ne_states(country = "Australia", returnclass = "sf") %>% filter(name_en == "New South Wales")
sydney_proj <- st_transform(sydney_region, crs = 3857)
df_proj <- st_transform(df, crs = 3857)
bbox_syd <- st_bbox(c(xmin = 150.5, xmax = 151.35, ymin = -34.15, ymax = -33.35), crs = 4326)
bbox_proj <- st_transform(st_as_sfc(bbox_syd), crs = 3857)
gg <- ggplot() +
geom_sf(data = sydney_proj, fill = "gray95", color = "gray85") +
geom_sf(data = trains_proj, aes(color = route_shor), size = 0.6) +
geom_point_interactive(
data = df_proj,
aes(geometry = geometry, tooltip = tooltip),
stat = "sf_coordinates",
size = 1.4,
colour = "darkred"
) +
coord_sf(xlim = st_bbox(bbox_proj)[c("xmin", "xmax")],
ylim = st_bbox(bbox_proj)[c("ymin", "ymax")]) +
ggtitle("Sydney Train Station Usage – COVID Phases") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 16))
girafe(
ggobj = gg,
options = list(
opts_tooltip(css = "background-color:white; color:#00274D; font-size:10px; padding:5px;"),
opts_zoom(max = 5)
)
)
}
plot_all_phases()